This dataset originates from a nutrigenomic study in mice (Martin et al., 2007), designed to investigate how dietary fatty acid composition influences liver lipid profiles and hepatic gene expression.
Experimental Design
Forty mice were cross-classified according to two experimental factors, each with multiple levels:
Genotype (2 levels):
Wild-type (WT)
PPARα knockout (PPARα -/-)
Diet (5 levels):
REF: Reference diet (50/50 corn and colza oils)
COC: Saturated fatty acid-rich diet (hydrogenated coconut oil)
SUN: Omega-6-rich diet (sunflower oil)
LIN: Omega-3-rich diet (linseed oil)
FISH: Mixed diet (corn/colza/enriched fish oils in 43/43/14 ratio)
Each genotype-diet combination includes four biological replicates.
Variables Collected
Two sets of variables were measured for each mouse:
Expression levels of 120 selected hepatic genes, chosen from ~30,000
candidates for their relevance to nutritional regulation.
Measured using nylon macroarrays with radioactive labeling.
Relative concentrations (percentages) of 21 hepatic fatty
acids.
Quantified via gas chromatography.
data("nutrimouse")
genes <- nutrimouse$gene
lipids <- nutrimouse$lipid
metadata <- data.frame(genotype = nutrimouse$genotype, diet = nutrimouse$diet)
metadata$sample_name <- paste0(rownames(metadata), "_", metadata$genotype, "_", metadata$diet)
rownames(genes) <- metadata$sample_name
rownames(lipids) <- metadata$sample_name
note: The gene expression data is reduced to 20 genes so that the number of variables is less than the number of samples, to perform an unregularized CCA.
nutrimouse$gene_selected <- nutrimouse$gene[,1:20]
str(nutrimouse$gene_selected)
## 'data.frame': 40 obs. of 20 variables:
## $ X36b4: num -0.42 -0.44 -0.48 -0.45 -0.42 -0.43 -0.53 -0.49 -0.36 -0.5 ...
## $ ACAT1: num -0.65 -0.68 -0.74 -0.69 -0.71 -0.69 -0.62 -0.69 -0.66 -0.62 ...
## $ ACAT2: num -0.84 -0.91 -1.1 -0.65 -0.54 -0.8 -1 -0.91 -0.74 -0.79 ...
## $ ACBP : num -0.34 -0.32 -0.46 -0.41 -0.38 -0.32 -0.44 -0.37 -0.39 -0.36 ...
## $ ACC1 : num -1.29 -1.23 -1.3 -1.26 -1.21 -1.13 -1.22 -1.29 -1.15 -1.21 ...
## $ ACC2 : num -1.13 -1.06 -1.09 -1.09 -0.89 -0.79 -1 -1.06 -1.08 -0.82 ...
## $ ACOTH: num -0.93 -0.99 -1.06 -0.93 -1 -0.93 -0.94 -1.05 -0.88 -0.92 ...
## $ ADISP: num -0.98 -0.97 -1.08 -1.02 -0.95 -0.97 -0.94 -1.02 -0.98 -0.99 ...
## $ ADSS1: num -1.19 -1 -1.18 -1.07 -1.08 -1.07 -1.05 -1.16 -1.05 -1 ...
## $ ALDH3: num -0.68 -0.62 -0.75 -0.71 -0.76 -0.75 -0.67 -0.75 -0.66 -0.69 ...
## $ AM2R : num -0.59 -0.58 -0.66 -0.65 -0.59 -0.55 -0.66 -0.66 -0.53 -0.62 ...
## $ AOX : num -0.16 -0.12 -0.16 -0.17 -0.31 -0.23 -0.09 -0.22 -0.06 -0.23 ...
## $ BACT : num -0.22 -0.32 -0.32 -0.32 -0.31 -0.29 -0.25 -0.21 -0.15 -0.2 ...
## $ BIEN : num -0.89 -0.88 -0.89 -0.77 -0.97 -0.84 -0.86 -0.9 -0.74 -0.76 ...
## $ BSEP : num -0.69 -0.6 -0.7 -0.67 -0.68 -0.55 -0.67 -0.66 -0.6 -0.58 ...
## $ Bcl.3: num -1.18 -1.07 -1.17 -1.12 -0.93 -1.08 -1.03 -1.01 -1.01 -1.1 ...
## $ C16SR: num 1.66 1.65 1.57 1.61 1.66 1.7 1.58 1.62 1.72 1.55 ...
## $ CACP : num -0.92 -0.87 -1.02 -0.89 -0.93 -0.97 -0.97 -0.96 -0.85 -0.95 ...
## $ CAR1 : num -0.97 -0.92 -0.98 -0.97 -1.06 -1.03 -0.91 -1.11 -0.85 -0.99 ...
## $ CBS : num -0.26 -0.36 -0.4 -0.39 -0.35 -0.31 -0.32 -0.4 -0.26 -0.39 ...
cca.res <- rcc(X=nutrimouse$gene_selected, Y=nutrimouse$lipid)
The sample distribution plot can be performed with variates, sample coordinates in the new reference (rotated axes) for each of the two blocks.
cca.res_scores_genes <- data.frame(metadata, cca.res$variates$X)
ggplot(cca.res_scores_genes, aes(x=X1, y=X2, col=diet, shape = genotype)) +
geom_point(size=4) +
labs(title = "scores plots on Dim.1 vs Dim.2 - genes") +
theme_light()
cca.res_scores_lipids <- data.frame(metadata, cca.res$variates$Y)
ggplot(cca.res_scores_lipids, aes(x=X1, y=X2, col=diet, shape = genotype)) +
geom_point(size=4) +
labs(title = "scores plots on Dim.1 vs Dim.2 - lipids") +
theme_light()
Variable relationship is obtained from loadings or
with plotVar.
cca.res_loadings_genes <- data.frame(cca.res$loadings$X)
cca.res_loadings_genes$variable <- rownames(cca.res_loadings_genes)
ggplot(cca.res_loadings_genes, aes(x=X1, y=X2, label=variable)) +
geom_point() +
geom_text_repel() +
labs(title = "loadings plots on Dim.1 Dim.2 - genes") +
theme_light()
cca.res_loadings_lipids <- data.frame(cca.res$loadings$Y)
cca.res_loadings_lipids$variable <- rownames(cca.res_loadings_lipids)
ggplot(cca.res_loadings_lipids, aes(x=X1, y=X2, label=variable)) +
geom_point() +
geom_text_repel() +
labs(title = "loadings plots on Dim.1 Dim.2 - lipids") +
theme_light()
plotVar(cca.res)
cca.res.scale <- rcc(X=scale(nutrimouse$gene_selected, center=T, scale=T),
Y=scale(nutrimouse$lipid, center=T, scale=T), ncomp=2)
cca.res.scale_scores_genes <- data.frame(metadata, cca.res.scale$variates$X)
ggplot(cca.res.scale_scores_genes, aes(x=X1, y=X2, col=diet, shape = genotype)) +
geom_point(size=4) +
labs(title = "scores plots on Dim.1 vs Dim.2 - scaled genes") +
theme_light()
ggplot(cca.res_scores_genes, aes(x=X1, y=X2, col=diet, shape = genotype)) +
geom_point(size=4) +
labs(title = "scores plots on Dim.1 vs Dim.2 - genes") +
theme_light()
cca.res.scale_scores_lipids <- data.frame(metadata, cca.res.scale$variates$Y)
ggplot(cca.res_scores_lipids, aes(x=X1, y=X2, col=diet, shape = genotype)) +
geom_point(size=4) +
labs(title = "scores plots on Dim.1 vs Dim.2 - scaled lipids") +
theme_light()
ggplot(cca.res_scores_lipids, aes(x=X1, y=X2, col=diet, shape = genotype)) +
geom_point(size=4) +
labs(title = "scores plots on Dim.1 vs Dim.2 - lipids") +
theme_light()
cca.res.scale_loadings_genes <- data.frame(cca.res.scale$loadings$X)
cca.res.scale_loadings_genes$variable <- rownames(cca.res.scale_loadings_genes)
ggplot(cca.res.scale_loadings_genes, aes(x=X1, y=X2, label=variable)) +
geom_point() +
geom_text_repel() +
labs(title = "loadings plots on Dim.1 Dim.2 - scaled genes") +
theme_light()
ggplot(cca.res_loadings_genes, aes(x=X1, y=X2, label=variable)) +
geom_point() +
geom_text_repel() +
labs(title = "loadings plots on Dim.1 Dim.2 - genes") +
theme_light()
cca.res.scale_loadings_lipids <- data.frame(cca.res.scale$loadings$Y)
cca.res.scale_loadings_lipids$variable <- rownames(cca.res.scale_loadings_lipids)
ggplot(cca.res.scale_loadings_lipids, aes(x=X1, y=X2, label=variable)) +
geom_point() +
geom_text_repel() +
labs(title = "loadings plots on Dim.1 Dim.2 - scaled lipids") +
theme_light()
ggplot(cca.res_loadings_lipids, aes(x=X1, y=X2, label=variable)) +
geom_point() +
geom_text_repel() +
labs(title = "loadings plots on Dim.1 Dim.2 - lipids") +
theme_light()
plotVar(cca.res)
rcca.res <- rcc(X=nutrimouse$gene, Y=nutrimouse$lipid, ncomp=2, method="shrinkage")
rcca.res_scores_genes <- data.frame(metadata, rcca.res$variates$X)
ggplot(rcca.res_scores_genes, aes(x=X1, y=X2, col=diet, shape = genotype)) +
geom_point(size=4) +
labs(title = "scores plots on Dim.1 vs Dim.2 - regularized cca - genes") +
theme_light()
ggplot(cca.res_scores_genes, aes(x=X1, y=X2, col=diet, shape = genotype)) +
geom_point(size=4) +
labs(title = "scores plots on Dim.1 vs Dim.2 - genes") +
theme_light()
rcca.res_scores_lipids <- data.frame(metadata, rcca.res$variates$Y)
ggplot(rcca.res_scores_lipids, aes(x=X1, y=X2, col=diet, shape = genotype)) +
geom_point(size=4) +
labs(title = "scores plots on Dim.1 vs Dim.2 - regularized cca - lipids") +
theme_light()
ggplot(cca.res_scores_lipids, aes(x=X1, y=X2, col=diet, shape = genotype)) +
geom_point(size=4) +
labs(title = "scores plots on Dim.1 vs Dim.2 - lipids") +
theme_light()
rcca.res_loadings_genes <- data.frame(rcca.res$loadings$X)
rcca.res_loadings_genes$variable <- rownames(rcca.res_loadings_genes)
ggplot(rcca.res_loadings_genes, aes(x=X1, y=X2, label=variable)) +
geom_point() +
geom_text_repel() +
labs(title = "loadings plots on Dim.1 Dim.2 - regularized cca - genes") +
theme_light()
ggplot(cca.res_loadings_genes, aes(x=X1, y=X2, label=variable)) +
geom_point() +
geom_text_repel() +
labs(title = "loadings plots on Dim.1 Dim.2 - genes") +
theme_light()
rcca.res_loadings_lipids <- data.frame(rcca.res$loadings$Y)
rcca.res_loadings_lipids$variable <- rownames(rcca.res_loadings_lipids)
ggplot(rcca.res_loadings_lipids, aes(x=X1, y=X2, label=variable)) +
geom_point() +
geom_text_repel() +
labs(title = "loadings plots on Dim.1 Dim.2 - regularized cca - lipids") +
theme_light()
ggplot(cca.res_loadings_lipids, aes(x=X1, y=X2, label=variable)) +
geom_point() +
geom_text_repel() +
labs(title = "loadings plots on Dim.1 Dim.2 - lipids") +
theme_light()
plotVar(rcca.res)